In [1]:
from eurostat_dataset import eurostat_dataset # packed in class the method to wrangle eurostat's table 
import pandas as pd
import sqlite3
import plotly.express as px
import plotly.offline as pyo
# The result can be printed to html by nbconvert. For this:
#   - pip install nbconvert -U   in the environment
#   - jupyter nbconvert Eurostat.ipynb --no-input --to html --no-prompt   - in the terminal 
  1. Processing the Final energy consumption data
In [3]:
# uncheck this comment to download dataset to the sqlite database
# eurostat_dataset(code='NRG_BAL_S').WriteToDatabase()
# eurostat_dataset(code='NRG_INF_EPCRW').WriteToDatabase()
# write to database, so you don't need to wait everytime when you restart the kernel or the file
In [5]:
#check the tables that had been written to the database:
def db_tables():
    con = sqlite3.connect('ngr.db')
    cursor = con.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    print(cursor.fetchall())
    con.close
In [6]:
# read table from db to DataFrame
def read_df_from_db(table):
    con = sqlite3.connect('ngr.db')
    cursor = con.cursor()   
    df = pd.read_sql_query("SELECT * FROM "+ table , con)
    con.close
    df.drop(columns=['index'],inplace=True)

    return df
In [23]:
tables = db_tables()
print(f'The list of tables in the local sqlite database {tables}') # we can see the tables in the database 'ngr.db'
[('nrg_bal_s',), ('nrg_inf_epcrw',)]
The list of tables in the local sqlite database None
In [11]:
renew = eurostat_dataset(code='nrg_inf_epcrw').DatasetInfo()
dataset_info=eurostat_dataset(code='NRG_BAL_S').DatasetInfo()
In [15]:
print(f'The first dataset: "{dataset_info[0]}" dates from {dataset_info[1]} to {dataset_info[2]}.')
print(f'The second dataset: "{renew[0]}" dates from {renew[1]} to {renew[2]}.')
The first dataset: "Simplified energy balances" dates from 1990 to 2020.
The second dataset: "Electricity production capacities for renewables and wastes" dates from 1990 to 2020.
In [18]:
#reading the table of the first dataset from local database
bal = read_df_from_db('nrg_bal_s')
balance=bal.copy()
balance = balance.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Final consumption - energy use" \
and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
get_energy = read_df_from_db('nrg_inf_epcrw')

Final consumption - energy use (Gigawatt-hour)

In [19]:
balance.head()
Out[19]:
nrg_bal siec unit geo date value
5778 Final consumption - energy use Total Gigawatt-hour Albania 2020 21474.051
5779 Final consumption - energy use Total Gigawatt-hour Austria 2020 288624.871
5780 Final consumption - energy use Total Gigawatt-hour Bosnia and Herzegovina 2020 46804.808
5781 Final consumption - energy use Total Gigawatt-hour Belgium 2020 360985.528
5782 Final consumption - energy use Total Gigawatt-hour Bulgaria 2020 110633.154
In [20]:
balance.value=pd.to_numeric(balance.value)
balance.date=pd.to_numeric(balance.date)
In [21]:
#I like plotly. It is more interactive)
# https://plotly.com/python/plotly-express/#gallery
pyo.init_notebook_mode() #trying to render the plot in github, but it is warking only in nbviewer!https://nbviewer.org/
fig = px.line(balance, x="date", y="value", color="geo",title="Final consumption - energy use,Gigawatt-hour")

pyo.iplot(fig)
In [24]:
#get population dataset and clean
pop = eurostat_dataset('TPS00001').GetDf()
population=pop.copy()
population['value'] = population.value.astype('str').fillna('').str.extract(r"(\d+)|(\d+)\.")[0].astype('Int32')
population['date'] = pd.to_numeric(population.date)
In [25]:
#merge with balance tables
m_balance = balance.merge(population[["geo","date","value"]],on=['geo',"date"],suffixes=("", "_population"))
m_balance = m_balance.assign(per_capita=lambda x: x.value*1000/x.value_population)
In [26]:
pyo.init_notebook_mode()
fig = px.line(m_balance, x="date", y="per_capita", color="geo",
                title="Final consumption - energy use per capita,Megawatt-hour")

pyo.iplot(fig)
In [27]:
get_energy.head()
Out[27]:
siec plant_tec unit geo date value
0 Solid biofuels Net maximum electrical capacity Megawatt Albania 2020 0.000
1 Solid biofuels Net maximum electrical capacity Megawatt Austria 2020 816.199
2 Solid biofuels Net maximum electrical capacity Megawatt Bosnia and Herzegovina 2020 1.120
3 Solid biofuels Net maximum electrical capacity Megawatt Belgium 2020 563.500
4 Solid biofuels Net maximum electrical capacity Megawatt Bulgaria 2020 15.064
In [28]:
"Gross electricity production"
el_production = bal.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Gross electricity production" \
and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
  1. Energy Consumption, electricity production and electricity production from renewable in Latvia 2020
In [29]:
# As for Latvia in 2020: 
# - Energy consumption: 44172.893 Gigawatt-hour
# - electricity production - 5724.846 Gigawatt-hour. 13% from Energy consumption
# - electricity production from renewable - 3649.529 Gigawatt-hour. 63% from electricity production
#
In [31]:
get_energy_lv2020 = get_energy.query('geo=="Latvia" and date=="2020" and siec in ("Solid biofuels","Biogases","Hydro","Geothermal","Wind","Solar")')
In [37]:
get_energy_lv2020.value/get_energy_lv2020.value.sum()*100
Out[37]:
25      5.262178
197     3.300042
240    86.890047
455     0.000000
498     4.268265
627     0.279468
Name: value, dtype: float64
In [31]:
el_production.query('geo=="Latvia" and date=="2020"')
Out[31]:
nrg_bal siec unit geo date value
52254 Gross electricity production Total Gigawatt-hour Latvia 2020 5724.846
In [32]:
bal.query('unit=="Gigawatt-hour" and nrg_bal=="Gross electricity production" \
and geo=="Latvia" and date=="2020"')
Out[32]:
nrg_bal siec unit geo date value
51006 Gross electricity production Solid fossil fuels Gigawatt-hour Latvia 2020 0.0
51132 Gross electricity production Manufactured gases Gigawatt-hour Latvia 2020 0.0
51256 Gross electricity production Electricity Gigawatt-hour Latvia 2020 None
51378 Gross electricity production Natural gas Gigawatt-hour Latvia 2020 2075.163
51502 Gross electricity production Heat Gigawatt-hour Latvia 2020 None
51624 Gross electricity production Nuclear heat Gigawatt-hour Latvia 2020 0.0
51750 Gross electricity production Oil and petroleum products (excluding biofuel ... Gigawatt-hour Latvia 2020 0.106
51876 Gross electricity production Peat and peat products Gigawatt-hour Latvia 2020 0.048
52002 Gross electricity production Renewables and biofuels Gigawatt-hour Latvia 2020 3649.529
52128 Gross electricity production Oil shale and oil sands Gigawatt-hour Latvia 2020 0.0
52254 Gross electricity production Total Gigawatt-hour Latvia 2020 5724.846
52380 Gross electricity production Non-renewable waste Gigawatt-hour Latvia 2020 0.0
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: